nldas_nest <- read_rds("data/nldas_nest.rds")
nldas_nest
## # A tibble: 100 x 3
##    installation    variable           data
##    <chr>           <chr>    <list<df[,2]>>
##  1 eglin_afb       tmp_f     [262,957 x 2]
##  2 fort_benning_ga tmp_f     [262,957 x 2]
##  3 fort_bliss      tmp_f     [262,957 x 2]
##  4 fort_bragg      tmp_f     [262,957 x 2]
##  5 fort_campbell   tmp_f     [262,957 x 2]
##  6 fort_carson     tmp_f     [262,957 x 2]
##  7 fort_drum       tmp_f     [262,957 x 2]
##  8 fort_gordon     tmp_f     [262,957 x 2]
##  9 fort_hood       tmp_f     [262,957 x 2]
## 10 fort_jackson    tmp_f     [262,957 x 2]
## # ... with 90 more rows

Create zoo timeseries

# Function to convert hourly `dttm` to a time series (zoo) object

make_ts = function(df) {
  zoo::zoo(x = df[["value"]],
           order.by = df[["local_dttm"]],
           frequency = 24)
}



# Map zoo ts function over each location
TMP_ts <-
  nldas_nest %>% 
    filter(variable == "TMP") %>%
    mutate(data_ts = 
      purrr::map(data, make_ts))

Create xts timeseries

# Create `xts` time series


tmp_f_ts <-
  nldas_nest %>% 
    filter(variable == "tmp_f") %>%
    mutate(data_ts = 
      purrr::map(data, timetk::tk_xts))


tmp_f_ts
## # A tibble: 25 x 4
##    installation    variable           data data_ts
##    <chr>           <chr>    <list<df[,2]>> <list> 
##  1 eglin_afb       tmp_f     [262,957 x 2] <xts>  
##  2 fort_benning_ga tmp_f     [262,957 x 2] <xts>  
##  3 fort_bliss      tmp_f     [262,957 x 2] <xts>  
##  4 fort_bragg      tmp_f     [262,957 x 2] <xts>  
##  5 fort_campbell   tmp_f     [262,957 x 2] <xts>  
##  6 fort_carson     tmp_f     [262,957 x 2] <xts>  
##  7 fort_drum       tmp_f     [262,957 x 2] <xts>  
##  8 fort_gordon     tmp_f     [262,957 x 2] <xts>  
##  9 fort_hood       tmp_f     [262,957 x 2] <xts>  
## 10 fort_jackson    tmp_f     [262,957 x 2] <xts>  
## # ... with 15 more rows
tmp_f_ts$data_ts[[1]] %>% head()
##                     value
## 1990-01-01 00:00:00 64.06
## 1990-01-01 01:00:00 62.47
## 1990-01-01 02:00:00 60.89
## 1990-01-01 03:00:00 59.31
## 1990-01-01 04:00:00 57.76
## 1990-01-01 05:00:00 56.21

Example dynamic graph of temperature time series

(Select segment with cursor to zoom)

# Sample time series dynamic graph

tmp_f_ts$data_ts[[1]] %>% 
  dygraph(main = tmp_f_ts$installation[1]) %>% 
  dySeries("value", label = "Temperature (°F)") %>%
  dyLegend(show = "always", hideOnMouseOut = FALSE)

Model time series

Exponential Smoothing ETS (Error, Trend, Seasonal) model function

# Ref: https://cran.rstudio.com/web/packages/sweep/vignettes/SW01_Forecasting_Time_Series_Groups.html

# Map the Exponential Smoothing ETS (Error, Trend, Seasonal) model function, ets, from the forecast package

tmp_f_fit <-
  tmp_f_ts %>%
  slice(1:4) %>% 
   mutate(data_ts = 
      purrr::map(data_ts, timetk::tk_ts)) %>% 
   mutate(fit_ets = map(data_ts, forecast::ets))

# View ETS model accuracies
tmp_f_fit %>%
    mutate(glance = map(fit_ets, sweep::sw_glance)) %>%
    unnest(glance)
## # A tibble: 4 x 17
##   installation variable          data data_ts fit_ets model.desc sigma  logLik
##   <chr>        <chr>    <list<df[,2]> <list>  <list>  <chr>      <dbl>   <dbl>
## 1 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   ETS(A,Ad,~ 0.910 -1.62e6
## 2 fort_bennin~ tmp_f    [262,957 x 2] <ts>    <ets>   ETS(A,Ad,~ 0.987 -1.64e6
## 3 fort_bliss   tmp_f    [262,957 x 2] <ts>    <ets>   ETS(A,Ad,~ 0.854 -1.60e6
## 4 fort_bragg   tmp_f    [262,957 x 2] <ts>    <ets>   ETS(A,Ad,~ 1.03  -1.65e6
## # ... with 9 more variables: AIC <dbl>, BIC <dbl>, ME <dbl>, RMSE <dbl>,
## #   MAE <dbl>, MPE <dbl>, MAPE <dbl>, MASE <dbl>, ACF1 <dbl>
# Augmented fitted and residual values

augment_fit_ets <- tmp_f_fit %>%
    mutate(augment = map(fit_ets, sweep::sw_augment, timetk_idx = TRUE, rename_index = "time")) %>%
    unnest(augment)

augment_fit_ets 
## # A tibble: 1,051,828 x 9
##    installation variable          data data_ts fit_ets time               
##    <chr>        <chr>    <list<df[,2]> <list>  <list>  <dttm>             
##  1 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 00:00:00
##  2 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 01:00:00
##  3 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 02:00:00
##  4 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 03:00:00
##  5 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 04:00:00
##  6 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 05:00:00
##  7 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 06:00:00
##  8 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 07:00:00
##  9 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 08:00:00
## 10 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 09:00:00
## # ... with 1,051,818 more rows, and 3 more variables: .actual <dbl>,
## #   .fitted <dbl>, .resid <dbl>
# Plot the residuals (slow)

#  augment_fit_ets %>%
#      filter(installation == "eglin_afb") %>% 
#      ggplot(aes(x = time, y = .resid)) +
#      geom_hline(yintercept = 0, color = "grey40") +
#      geom_line() +
#      geom_smooth(method = "loess") +
#      labs(title = "Temperature (°F)",
#           subtitle = "ETS Model Residuals") + 
#      theme_bw() 

# Create decompositions

tmp_f_fit %>%
    mutate(decomp = map(fit_ets, sweep::sw_tidy_decomp, timetk_idx = TRUE, rename_index = "time")) %>%
    unnest(decomp)
## # A tibble: 1,051,828 x 9
##    installation variable          data data_ts fit_ets time               
##    <chr>        <chr>    <list<df[,2]> <list>  <list>  <dttm>             
##  1 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 00:00:00
##  2 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 01:00:00
##  3 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 02:00:00
##  4 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 03:00:00
##  5 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 04:00:00
##  6 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 05:00:00
##  7 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 06:00:00
##  8 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 07:00:00
##  9 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 08:00:00
## 10 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   1990-01-01 09:00:00
## # ... with 1,051,818 more rows, and 3 more variables: observed <dbl>,
## #   level <dbl>, slope <dbl>
# Forecasting the model

tmp_f_fit_fcast <- tmp_f_fit %>%
    mutate(fcast_ets = map(fit_ets, forecast, h = 336),
           sweep = map(fcast_ets, sweep::sw_sweep, fitted = FALSE, timetk_idx = TRUE)) %>%
    unnest(sweep)
    
tmp_f_fit_fcast
## # A tibble: 1,053,172 x 13
##    installation variable          data data_ts fit_ets fcast_ets
##    <chr>        <chr>    <list<df[,2]> <list>  <list>  <list>   
##  1 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
##  2 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
##  3 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
##  4 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
##  5 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
##  6 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
##  7 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
##  8 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
##  9 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
## 10 eglin_afb    tmp_f    [262,957 x 2] <ts>    <ets>   <forecas~
## # ... with 1,053,162 more rows, and 7 more variables: index <dttm>, key <chr>,
## #   value <dbl>, lo.80 <dbl>, lo.95 <dbl>, hi.80 <dbl>, hi.95 <dbl>
# Plot forecast

tmp_f_fit_fcast %>%
    filter(installation == "eglin_afb",
           index > as.Date("2019-05-01")) %>% 
    ggplot(aes(x = index, y = value, color = key)) +
    geom_ribbon(aes(ymin = lo.95, ymax = hi.95), 
                fill = "#D5DBFF", color = NA, size = 0) +
    geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key), 
                fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
    geom_line() +
    labs(title = "Temperature (°F)",
         subtitle = "ETS Model Forecasts",
         x = "", y = "Temperature (°F)") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Multiple seasonal decomposition

# Multiple seasonal decomposition


tmp_f_fit_mstl <-
  tmp_f_fit %>% 
    mutate(fit_mstl =
             map(data_ts, forecast::mstl)) 

tmp_f_fit_mstl$fit_mstl[[1]] %>% 
  autoplot()